Preparation of input files

The input files for this analysis are .mirna format files that are generated through miraligner. PCR duplicates are removed using seqcluster.

Library loading and set up

suppressMessages(
  c(library(isomiRs),
    library(DESeq2),
    library(tximport),
    library(pheatmap),
    library(gridExtra),
    library(grid),
    library(ggplot2),
    library(lattice),
    library(reshape),
    library(mixOmics),
    library(gplots),
    library(RColorBrewer),
    library(readr),
    library(dplyr),
    library(data.table),
    library(tidyverse),
    library(rtracklayer),
    library(Biostrings),
    library(Rsubread),
    library(ggrepel),
    library(CLIPanalyze),
    library(plotly)
   )
)
##   [1] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
##   [4] "matrixStats"          "Biobase"              "GenomicRanges"       
##   [7] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [10] "BiocGenerics"         "parallel"             "stats4"              
##  [13] "DiscriMiner"          "stats"                "graphics"            
##  [16] "grDevices"            "utils"                "datasets"            
##  [19] "methods"              "base"                 "DESeq2"              
##  [22] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
##  [25] "matrixStats"          "Biobase"              "GenomicRanges"       
##  [28] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [31] "BiocGenerics"         "parallel"             "stats4"              
##  [34] "DiscriMiner"          "stats"                "graphics"            
##  [37] "grDevices"            "utils"                "datasets"            
##  [40] "methods"              "base"                 "tximport"            
##  [43] "DESeq2"               "isomiRs"              "SummarizedExperiment"
##  [46] "DelayedArray"         "matrixStats"          "Biobase"             
##  [49] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
##  [52] "S4Vectors"            "BiocGenerics"         "parallel"            
##  [55] "stats4"               "DiscriMiner"          "stats"               
##  [58] "graphics"             "grDevices"            "utils"               
##  [61] "datasets"             "methods"              "base"                
##  [64] "pheatmap"             "tximport"             "DESeq2"              
##  [67] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
##  [70] "matrixStats"          "Biobase"              "GenomicRanges"       
##  [73] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
##  [76] "BiocGenerics"         "parallel"             "stats4"              
##  [79] "DiscriMiner"          "stats"                "graphics"            
##  [82] "grDevices"            "utils"                "datasets"            
##  [85] "methods"              "base"                 "gridExtra"           
##  [88] "pheatmap"             "tximport"             "DESeq2"              
##  [91] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
##  [94] "matrixStats"          "Biobase"              "GenomicRanges"       
##  [97] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [100] "BiocGenerics"         "parallel"             "stats4"              
## [103] "DiscriMiner"          "stats"                "graphics"            
## [106] "grDevices"            "utils"                "datasets"            
## [109] "methods"              "base"                 "grid"                
## [112] "gridExtra"            "pheatmap"             "tximport"            
## [115] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [118] "DelayedArray"         "matrixStats"          "Biobase"             
## [121] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [124] "S4Vectors"            "BiocGenerics"         "parallel"            
## [127] "stats4"               "DiscriMiner"          "stats"               
## [130] "graphics"             "grDevices"            "utils"               
## [133] "datasets"             "methods"              "base"                
## [136] "ggplot2"              "grid"                 "gridExtra"           
## [139] "pheatmap"             "tximport"             "DESeq2"              
## [142] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [145] "matrixStats"          "Biobase"              "GenomicRanges"       
## [148] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [151] "BiocGenerics"         "parallel"             "stats4"              
## [154] "DiscriMiner"          "stats"                "graphics"            
## [157] "grDevices"            "utils"                "datasets"            
## [160] "methods"              "base"                 "lattice"             
## [163] "ggplot2"              "grid"                 "gridExtra"           
## [166] "pheatmap"             "tximport"             "DESeq2"              
## [169] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [172] "matrixStats"          "Biobase"              "GenomicRanges"       
## [175] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [178] "BiocGenerics"         "parallel"             "stats4"              
## [181] "DiscriMiner"          "stats"                "graphics"            
## [184] "grDevices"            "utils"                "datasets"            
## [187] "methods"              "base"                 "reshape"             
## [190] "lattice"              "ggplot2"              "grid"                
## [193] "gridExtra"            "pheatmap"             "tximport"            
## [196] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [199] "DelayedArray"         "matrixStats"          "Biobase"             
## [202] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [205] "S4Vectors"            "BiocGenerics"         "parallel"            
## [208] "stats4"               "DiscriMiner"          "stats"               
## [211] "graphics"             "grDevices"            "utils"               
## [214] "datasets"             "methods"              "base"                
## [217] "mixOmics"             "MASS"                 "reshape"             
## [220] "lattice"              "ggplot2"              "grid"                
## [223] "gridExtra"            "pheatmap"             "tximport"            
## [226] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [229] "DelayedArray"         "matrixStats"          "Biobase"             
## [232] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [235] "S4Vectors"            "BiocGenerics"         "parallel"            
## [238] "stats4"               "DiscriMiner"          "stats"               
## [241] "graphics"             "grDevices"            "utils"               
## [244] "datasets"             "methods"              "base"                
## [247] "gplots"               "mixOmics"             "MASS"                
## [250] "reshape"              "lattice"              "ggplot2"             
## [253] "grid"                 "gridExtra"            "pheatmap"            
## [256] "tximport"             "DESeq2"               "isomiRs"             
## [259] "SummarizedExperiment" "DelayedArray"         "matrixStats"         
## [262] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [265] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [268] "parallel"             "stats4"               "DiscriMiner"         
## [271] "stats"                "graphics"             "grDevices"           
## [274] "utils"                "datasets"             "methods"             
## [277] "base"                 "RColorBrewer"         "gplots"              
## [280] "mixOmics"             "MASS"                 "reshape"             
## [283] "lattice"              "ggplot2"              "grid"                
## [286] "gridExtra"            "pheatmap"             "tximport"            
## [289] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [292] "DelayedArray"         "matrixStats"          "Biobase"             
## [295] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [298] "S4Vectors"            "BiocGenerics"         "parallel"            
## [301] "stats4"               "DiscriMiner"          "stats"               
## [304] "graphics"             "grDevices"            "utils"               
## [307] "datasets"             "methods"              "base"                
## [310] "readr"                "RColorBrewer"         "gplots"              
## [313] "mixOmics"             "MASS"                 "reshape"             
## [316] "lattice"              "ggplot2"              "grid"                
## [319] "gridExtra"            "pheatmap"             "tximport"            
## [322] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [325] "DelayedArray"         "matrixStats"          "Biobase"             
## [328] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [331] "S4Vectors"            "BiocGenerics"         "parallel"            
## [334] "stats4"               "DiscriMiner"          "stats"               
## [337] "graphics"             "grDevices"            "utils"               
## [340] "datasets"             "methods"              "base"                
## [343] "dplyr"                "readr"                "RColorBrewer"        
## [346] "gplots"               "mixOmics"             "MASS"                
## [349] "reshape"              "lattice"              "ggplot2"             
## [352] "grid"                 "gridExtra"            "pheatmap"            
## [355] "tximport"             "DESeq2"               "isomiRs"             
## [358] "SummarizedExperiment" "DelayedArray"         "matrixStats"         
## [361] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [364] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [367] "parallel"             "stats4"               "DiscriMiner"         
## [370] "stats"                "graphics"             "grDevices"           
## [373] "utils"                "datasets"             "methods"             
## [376] "base"                 "data.table"           "dplyr"               
## [379] "readr"                "RColorBrewer"         "gplots"              
## [382] "mixOmics"             "MASS"                 "reshape"             
## [385] "lattice"              "ggplot2"              "grid"                
## [388] "gridExtra"            "pheatmap"             "tximport"            
## [391] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [394] "DelayedArray"         "matrixStats"          "Biobase"             
## [397] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [400] "S4Vectors"            "BiocGenerics"         "parallel"            
## [403] "stats4"               "DiscriMiner"          "stats"               
## [406] "graphics"             "grDevices"            "utils"               
## [409] "datasets"             "methods"              "base"                
## [412] "forcats"              "stringr"              "purrr"               
## [415] "tidyr"                "tibble"               "tidyverse"           
## [418] "data.table"           "dplyr"                "readr"               
## [421] "RColorBrewer"         "gplots"               "mixOmics"            
## [424] "MASS"                 "reshape"              "lattice"             
## [427] "ggplot2"              "grid"                 "gridExtra"           
## [430] "pheatmap"             "tximport"             "DESeq2"              
## [433] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [436] "matrixStats"          "Biobase"              "GenomicRanges"       
## [439] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [442] "BiocGenerics"         "parallel"             "stats4"              
## [445] "DiscriMiner"          "stats"                "graphics"            
## [448] "grDevices"            "utils"                "datasets"            
## [451] "methods"              "base"                 "rtracklayer"         
## [454] "forcats"              "stringr"              "purrr"               
## [457] "tidyr"                "tibble"               "tidyverse"           
## [460] "data.table"           "dplyr"                "readr"               
## [463] "RColorBrewer"         "gplots"               "mixOmics"            
## [466] "MASS"                 "reshape"              "lattice"             
## [469] "ggplot2"              "grid"                 "gridExtra"           
## [472] "pheatmap"             "tximport"             "DESeq2"              
## [475] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [478] "matrixStats"          "Biobase"              "GenomicRanges"       
## [481] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [484] "BiocGenerics"         "parallel"             "stats4"              
## [487] "DiscriMiner"          "stats"                "graphics"            
## [490] "grDevices"            "utils"                "datasets"            
## [493] "methods"              "base"                 "Biostrings"          
## [496] "XVector"              "rtracklayer"          "forcats"             
## [499] "stringr"              "purrr"                "tidyr"               
## [502] "tibble"               "tidyverse"            "data.table"          
## [505] "dplyr"                "readr"                "RColorBrewer"        
## [508] "gplots"               "mixOmics"             "MASS"                
## [511] "reshape"              "lattice"              "ggplot2"             
## [514] "grid"                 "gridExtra"            "pheatmap"            
## [517] "tximport"             "DESeq2"               "isomiRs"             
## [520] "SummarizedExperiment" "DelayedArray"         "matrixStats"         
## [523] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [526] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [529] "parallel"             "stats4"               "DiscriMiner"         
## [532] "stats"                "graphics"             "grDevices"           
## [535] "utils"                "datasets"             "methods"             
## [538] "base"                 "Rsubread"             "Biostrings"          
## [541] "XVector"              "rtracklayer"          "forcats"             
## [544] "stringr"              "purrr"                "tidyr"               
## [547] "tibble"               "tidyverse"            "data.table"          
## [550] "dplyr"                "readr"                "RColorBrewer"        
## [553] "gplots"               "mixOmics"             "MASS"                
## [556] "reshape"              "lattice"              "ggplot2"             
## [559] "grid"                 "gridExtra"            "pheatmap"            
## [562] "tximport"             "DESeq2"               "isomiRs"             
## [565] "SummarizedExperiment" "DelayedArray"         "matrixStats"         
## [568] "Biobase"              "GenomicRanges"        "GenomeInfoDb"        
## [571] "IRanges"              "S4Vectors"            "BiocGenerics"        
## [574] "parallel"             "stats4"               "DiscriMiner"         
## [577] "stats"                "graphics"             "grDevices"           
## [580] "utils"                "datasets"             "methods"             
## [583] "base"                 "ggrepel"              "Rsubread"            
## [586] "Biostrings"           "XVector"              "rtracklayer"         
## [589] "forcats"              "stringr"              "purrr"               
## [592] "tidyr"                "tibble"               "tidyverse"           
## [595] "data.table"           "dplyr"                "readr"               
## [598] "RColorBrewer"         "gplots"               "mixOmics"            
## [601] "MASS"                 "reshape"              "lattice"             
## [604] "ggplot2"              "grid"                 "gridExtra"           
## [607] "pheatmap"             "tximport"             "DESeq2"              
## [610] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [613] "matrixStats"          "Biobase"              "GenomicRanges"       
## [616] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [619] "BiocGenerics"         "parallel"             "stats4"              
## [622] "DiscriMiner"          "stats"                "graphics"            
## [625] "grDevices"            "utils"                "datasets"            
## [628] "methods"              "base"                 "CLIPanalyze"         
## [631] "GenomicAlignments"    "Rsamtools"            "GenomicFeatures"     
## [634] "AnnotationDbi"        "plyr"                 "ggrepel"             
## [637] "Rsubread"             "Biostrings"           "XVector"             
## [640] "rtracklayer"          "forcats"              "stringr"             
## [643] "purrr"                "tidyr"                "tibble"              
## [646] "tidyverse"            "data.table"           "dplyr"               
## [649] "readr"                "RColorBrewer"         "gplots"              
## [652] "mixOmics"             "MASS"                 "reshape"             
## [655] "lattice"              "ggplot2"              "grid"                
## [658] "gridExtra"            "pheatmap"             "tximport"            
## [661] "DESeq2"               "isomiRs"              "SummarizedExperiment"
## [664] "DelayedArray"         "matrixStats"          "Biobase"             
## [667] "GenomicRanges"        "GenomeInfoDb"         "IRanges"             
## [670] "S4Vectors"            "BiocGenerics"         "parallel"            
## [673] "stats4"               "DiscriMiner"          "stats"               
## [676] "graphics"             "grDevices"            "utils"               
## [679] "datasets"             "methods"              "base"                
## [682] "plotly"               "CLIPanalyze"          "GenomicAlignments"   
## [685] "Rsamtools"            "GenomicFeatures"      "AnnotationDbi"       
## [688] "plyr"                 "ggrepel"              "Rsubread"            
## [691] "Biostrings"           "XVector"              "rtracklayer"         
## [694] "forcats"              "stringr"              "purrr"               
## [697] "tidyr"                "tibble"               "tidyverse"           
## [700] "data.table"           "dplyr"                "readr"               
## [703] "RColorBrewer"         "gplots"               "mixOmics"            
## [706] "MASS"                 "reshape"              "lattice"             
## [709] "ggplot2"              "grid"                 "gridExtra"           
## [712] "pheatmap"             "tximport"             "DESeq2"              
## [715] "isomiRs"              "SummarizedExperiment" "DelayedArray"        
## [718] "matrixStats"          "Biobase"              "GenomicRanges"       
## [721] "GenomeInfoDb"         "IRanges"              "S4Vectors"           
## [724] "BiocGenerics"         "parallel"             "stats4"              
## [727] "DiscriMiner"          "stats"                "graphics"            
## [730] "grDevices"            "utils"                "datasets"            
## [733] "methods"              "base"

Initial analysis

Compile input files into isomiRs

Set working directory to the folder that contains only gene count .mirna files

setwd("/Users/mizuhi/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/miRNA_Analysis/Gene Counts")
inDir = getwd()
mirFiles = list.files(inDir, pattern=".mirna$", full.names = TRUE)
basename(mirFiles)
## [1] "HF4_LIB044916_GEN00173025_R1_barcode.cutadapt.mirna" 
## [2] "HF5_LIB044916_GEN00173028_R1_barcode.cutadapt.mirna" 
## [3] "HF6_LIB044916_GEN00173031_R1_barcode.cutadapt.mirna" 
## [4] "HFK4_LIB044916_GEN00173034_R1_barcode.cutadapt.mirna"
## [5] "HFK5_LIB044916_GEN00173037_R1_barcode.cutadapt.mirna"
## [6] "HFK6_LIB044916_GEN00173040_R1_barcode.cutadapt.mirna"
isomiRsampletable <- data.frame(
  row.names = c('HF_4_miRNA','HF_5_miRNA','HF_6_miRNA','HFK_4_miRNA','HFK_5_miRNA', 'HFK_6_miRNA'),
  condition = c('control','control','control','experimental','experimental', 'experimental'),
  libType = c('single-end','single-end','single-end','single-end','single-end','single-end')
)

ids <- IsomirDataSeqFromFiles(mirFiles,
                              coldata = isomiRsampletable,
                              design = ~ condition)

miRNA general abundance and count matrix

dir.create("PDF_figure", showWarnings = FALSE)
isoPlot(ids, type="all")
## Ussing 775 isomiRs.

pdf("PDF_figure/isoPlot.pdf",
    width = 8,
    height = 6)
isoPlot(ids, type="all")
## Ussing 775 isomiRs.
# overview of the counts
head(counts(ids))
##               HF_4_miRNA HF_5_miRNA HF_6_miRNA HFK_4_miRNA HFK_5_miRNA
## mmu-let-7a-5p        822        667       1221         514         949
## mmu-let-7b-3p         35         40         24          18           9
## mmu-let-7b-5p       4907       4801       6739        2973        4093
## mmu-let-7c-5p       2599       2694       4019        1999        2945
## mmu-let-7d-3p       1203       1006        677         457         320
## mmu-let-7d-5p        312        373        603         274         366
##               HFK_6_miRNA
## mmu-let-7a-5p         539
## mmu-let-7b-3p          16
## mmu-let-7b-5p        3163
## mmu-let-7c-5p        2154
## mmu-let-7d-3p         528
## mmu-let-7d-5p         239
# output the count matrix
ids = isoNorm(ids, formula = ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
rawCountTable <- as.data.frame(counts(ids, norm = TRUE))

# normalization is doing using rlog from DESeq2
pheatmap(counts(ids, norm=TRUE), 
         annotation_col = data.frame(colData(ids)[,1,drop=FALSE]),
         show_rownames = FALSE, scale="row")

pdf("PDF_figure/Pheatmap.pdf",
    width = 5,
    height = 4)
pheatmap(counts(ids, norm=TRUE), 
         annotation_col = data.frame(colData(ids)[,1,drop=FALSE]),
         show_rownames = FALSE, scale="row")
dev.off()
## pdf 
##   3

Differnetial Analysis

# Get a DESeq object using the count matrix
dds <- DESeqDataSetFromMatrix(counts(ids),
                             colData(ids), design = ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
dds_analysis <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotMA(dds_analysis)

pdf("PDF_figure/QC_MAPlot.pdf",
    width = 5,
    height = 4)
plotMA(dds_analysis)
dev.off()
## quartz_off_screen 
##                 2
# transform the dataset for MA plot and PCA plotting
dds_transform <- varianceStabilizingTransformation(dds)

Quality Inspection of the Gene Count Data

Generate raw count table that contains the simple counts of each gene

Data is transformed and pseudocount is calculated.

dds_norm <- estimateSizeFactors(dds)
rawCountTable <- as.data.frame(counts(dds_norm, normalized = TRUE))
rawCountTable_no_norm <- as.data.frame(counts(dds_norm))
pseudoCount = log2(rawCountTable + 1)
grid.arrange(
  ggplot(pseudoCount, aes(x= HF_4_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_4_miR"), 
  ggplot(pseudoCount, aes(x= HF_5_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_5_miR"),
  ggplot(pseudoCount, aes(x= HF_6_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_6_miR"),
  ggplot(pseudoCount, aes(x= HFK_4_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_4_miR"),
  ggplot(pseudoCount, aes(x= HFK_5_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_5_miR"),
  ggplot(pseudoCount, aes(x= HFK_6_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_6_miR"), 
  nrow = 2)

pdf("PDF_figure/QC_histogram.pdf",
    width = 8,
    height = 6)
grid.arrange(
  ggplot(pseudoCount, aes(x= HF_4_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_4_miR"), 
  ggplot(pseudoCount, aes(x= HF_5_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_5_miR"),
  ggplot(pseudoCount, aes(x= HF_6_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HF_6_miR"),
  ggplot(pseudoCount, aes(x= HFK_4_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_4_miR"),
  ggplot(pseudoCount, aes(x= HFK_5_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_5_miR"),
  ggplot(pseudoCount, aes(x= HFK_6_miRNA)) + xlab(expression(log[2](count + 1))) + ylab("Number of miRNAs") + 
    geom_histogram(colour = "white", fill = "#525252", binwidth = 0.6) + labs(title = "HFK_6_miR"), 
  nrow = 2)
dev.off()
## quartz_off_screen 
##                 2
Between-sample distribution

Check on the gene count distribution across all genes.

#Boxplots
suppressMessages(df <- melt(pseudoCount, variable_name = "Samples"))
## Warning in melt(pseudoCount, variable_name = "Samples"): The melt generic in
## data.table has been passed a data.frame and will attempt to redirect to the
## relevant reshape2 method; please note that reshape2 is deprecated, and this
## redirection is now deprecated as well. To continue using melt methods from
## reshape2 while both libraries are attached, e.g. melt.list, you can prepend the
## namespace like reshape2::melt(pseudoCount). In the next version, this warning
## will become an error.
df <- data.frame(df, Condition = substr(df$variable,1,3))

ggplot(df, aes(x=variable, y=value, fill = Condition)) + geom_boxplot() + xlab("") + 
  ylab(expression(log[2](count+1))) + scale_fill_manual(values = c("#619CFF", "#F564E3")) + theme(axis.text.x = element_text(angle = 90, hjust = 1))

pdf("PDF_figure/QC_BoxPlot.pdf",
    width = 5,
    height = 4)
ggplot(df, aes(x=variable, y=value, fill = Condition)) + geom_boxplot() + xlab("") + 
  ylab(expression(log[2](count+1))) + scale_fill_manual(values = c("#619CFF", "#F564E3")) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()
## quartz_off_screen 
##                 2
#Histograms and density plots
ggplot(df, aes(x=value, colour = variable, fill = variable)) + ylim(c(0, 0.25)) + 
  geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
  theme(legend.position = "top") + xlab(expression(log[2](count+1)))

pdf("PDF_figure/QC_densityPlot.pdf",
    width = 8,
    height = 6)
ggplot(df, aes(x=value, colour = variable, fill = variable)) + ylim(c(0, 0.25)) + 
  geom_density(alpha = 0.2, size = 1.25) + facet_wrap(~ Condition) +
  theme(legend.position = "top") + xlab(expression(log[2](count+1)))
dev.off()
## quartz_off_screen 
##                 2
Generate MA plots

MA plots are used to check for imbalance in sequencing depth between samples of the same condition. I did not generate MA plot for all library pairs. But the example pairs I selected show that there are imbalance in sequencing depth, but the imbalance is quite random and this is common in RNA-Seq datasets.

## HF4 vs HF5
x = pseudoCount[, 1]
y = pseudoCount[, 2]
## M-values
M = x - y
## A-values
A = (x + y)/2
df = data.frame(A, M)

suppressWarnings(
ggplot(df, aes(x = A, y = M)) + geom_point(size = 1.5, alpha = 1/5) +
  geom_hline(color = "blue3", yintercept = 0) + stat_smooth(se = FALSE, method = "loess", color = "red3") + labs(title = "HF_4_miR vs HF_5_miR"))
## `geom_smooth()` using formula 'y ~ x'

## HF4 vs HF6
x = pseudoCount[, 1]
y = pseudoCount[, 3]
## M-values
M = x - y
## A-values
A = (x + y)/2
df = data.frame(A, M)

suppressWarnings(
ggplot(df, aes(x = A, y = M)) + geom_point(size = 1.5, alpha = 1/5) +
  geom_hline(color = "blue3", yintercept = 0) + stat_smooth(se = FALSE, method = "loess", color = "red3") + labs(title = "HF_4_miR vs HF_5_miR"))
## `geom_smooth()` using formula 'y ~ x'

## HF4 vs HFK4
x = pseudoCount[, 1]
y = pseudoCount[, 4]
## M-values
M = x - y
## A-values
A = (x + y)/2
df = data.frame(A, M)

suppressWarnings(
ggplot(df, aes(x = A, y = M)) + geom_point(size = 1.5, alpha = 1/5) +
  geom_hline(color = "blue3", yintercept = 0) + stat_smooth(se = FALSE, method = "loess", color = "red3") + labs(title = "HF_4_miR vs HFK_4_miR"))
## `geom_smooth()` using formula 'y ~ x'

## HF4 vs HFK5
x = pseudoCount[, 1]
y = pseudoCount[, 5]
## M-values
M = x - y
## A-values
A = (x + y)/2
df = data.frame(A, M)

suppressWarnings(
ggplot(df, aes(x = A, y = M)) + geom_point(size = 1.5, alpha = 1/5) +
  geom_hline(color = "blue3", yintercept = 0) + stat_smooth(se = FALSE, method = "loess", color = "red3") + labs(title = "HF_4_miR vs HFK_5_miR"))
## `geom_smooth()` using formula 'y ~ x'

## HF4 vs HFK6
x = pseudoCount[, 1]
y = pseudoCount[, 6]
## M-values
M = x - y
## A-values
A = (x + y)/2
df = data.frame(A, M)

suppressWarnings(
ggplot(df, aes(x = A, y = M)) + geom_point(size = 1.5, alpha = 1/5) +
  geom_hline(color = "blue3", yintercept = 0) + stat_smooth(se = FALSE, method = "loess", color = "red3") + labs(title = "HF_4_miR vs HFK_6_miR"))
## `geom_smooth()` using formula 'y ~ x'

##### Clustering of the sample-to-sample distances This is the sanity check step to confirm that under a un-supervised clustering, HF and HFK samples will cluster together. For some reason, the code is giving error when try to plot this heatmap in RStudio, so I created a pdf file that contains the heatmap in the Analysis folder named Hierchical Clustering.pnf

rawCountTable_transform <- as.data.frame(assay(dds_transform))
pseudoCount_transform = log2(rawCountTable_transform + 1)
mat.dist = pseudoCount_transform
mat.dist = as.matrix(dist(t(mat.dist)))
mat.dist = mat.dist/max(mat.dist)
setwd("/Users/mizuhi/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/miRNA_Analysis/Analysis")
png('Hierchical_Clustering.png')
cim(mat.dist, symkey = FALSE, margins = c(7, 7))
suppressMessages(dev.off())
## quartz_off_screen 
##                 2
pdf("PDF_figure/Hierchical_Clustering.pdf",
    width = 6,
    height = 6)
cim(mat.dist, symkey = FALSE, margins = c(7, 7))
dev.off()
## quartz_off_screen 
##                 2

Final output is following: Hierchical Clustering

Principal component plot of the samples

I performed PCA analysis on all datasets to confirm that samples from the same condition group together. This step has to be performed using varianceStabelizingTransformed dataset, so that the high variance in genes with low counts will not skew the data.

The top 500 most variable genes are selected for PCA analysis.

plotPCA(dds_transform, intgroup = "condition", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-10,18) + ylim(-10,15)

pdf("PDF_figure/QC_PCAPlot.pdf",
    width = 6,
    height = 4)
plotPCA(dds_transform, intgroup = "condition", ntop = 500) +   
  geom_text(aes(label=name), vjust = 2, hjust = -0.1) + xlim(-10,18) + ylim(-10,15)
dev.off()
## quartz_off_screen 
##                 2
Outout gene counts and DE analysis result
dds_result <- results(dds_analysis, contrast = c("condition", "experimental", "control"))
dds_result <- lfcShrink(dds_analysis, contrast = c("condition", "experimental", "control"), res = dds_result, type = "normal")
## using 'normal' for LFC shrinkage, the Normal prior from Love et al (2014).
## 
## Note that type='apeglm' and type='ashr' have shown to have less bias than type='normal'.
## See ?lfcShrink for more details on shrinkage type, and the DESeq2 vignette.
## Reference: https://doi.org/10.1093/bioinformatics/bty895
write.csv(rawCountTable, "miRNA_counts.csv")
write.csv(as.data.frame(dds_result), "miRNA_de.csv")
Draw heatmap for transcripts that are significantly dysregulated in KRasG12D samples

Genes that were not detected were removed from the list. Genes with padj < 0.05 were considered significantly dysregulated. Their normalized counts were z-scored and used for plotting the heatmap.

suppressMessages(library(mosaic))

dif_analysis <- as.data.frame(results(dds_analysis))
sig_dif <- subset(dif_analysis, dif_analysis$padj < 0.05)
sig_index <- c()
for (i in 1:dim(sig_dif)[1]) {
  sig_index <- c(sig_index ,grep(paste("^", rownames(sig_dif)[i], "$", sep = ""), rownames(pseudoCount)))
}
sig_count <- pseudoCount[sig_index,]
sig_dif <- cbind(sig_dif, sig_count)
for (i in 1:dim(sig_dif)[1]) {
  sig_dif[i,7:12] <- zscore(as.numeric(sig_dif[i,7:12]))
}

my_palette <- colorRampPalette(c("blue", "white", "red"))(256)
heatmap_matrix <- as.matrix(sig_dif[,7:12])

png('HFK vs HF microRNASeq.png',
    width = 600,
    height = 1000,
    res = 100,
    pointsize = 8)
par(cex.main=1.1)
heatmap.2(heatmap_matrix,
          main = "Differentially expressed\nmiRNA in colon tumor\npadj < 0.05",
          density.info = "none",
          key = TRUE,
          lwid = c(3,7),
          lhei = c(1,7),
          labRow = rownames(heatmap_matrix),
          col=my_palette,
          margins = c(14,11),
          symbreaks = TRUE,
          trace = "none",
          dendrogram = "row",
          cexCol = 2,
          Colv = "NA")
dev.off()
## quartz_off_screen 
##                 2
pdf("PDF_figure/Heatmap.pdf",
    width = 7,
    height = 10)
par(cex.main=1.1)
heatmap.2(heatmap_matrix,
          main = "Differentially expressed\nmiRNA in colon tumor\npadj < 0.05",
          density.info = "none",
          key = TRUE,
          lwid = c(3,7),
          lhei = c(1,7),
          labRow = rownames(heatmap_matrix),
          col=my_palette,
          margins = c(14,11),
          symbreaks = TRUE,
          trace = "none",
          dendrogram = "row",
          cexCol = 2,
          Colv = "NA")
dev.off()
## quartz_off_screen 
##                 2

Final output is Heatmap for differential genes

Scatter plot, MA plot and Volcano plot for data visualization
# Scatter plot
dif_analysis$KrasG12D_mean <- rowMeans(pseudoCount[,4:6])
dif_analysis$KrasWT_mean <- rowMeans(pseudoCount[,1:3])
ggplot(dif_analysis, aes(x = KrasWT_mean, y = KrasG12D_mean)) +
  xlab("HF_Average(log2)") + ylab("HFK_Average(log2)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
  labs(title = "HF vs HFK Scatter Plot")

pdf("PDF_figure/ScatterPlot.pdf",
    width = 5,
    height = 4)
ggplot(dif_analysis, aes(x = KrasWT_mean, y = KrasG12D_mean)) +
  xlab("HF_Average(log2)") + ylab("HFK_Average(log2)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
  labs(title = "HF vs HFK Scatter Plot")
dev.off()
## quartz_off_screen 
##                 2
# MA plot
ggplot(dif_analysis, aes(x = log(baseMean,2), y = log2FoldChange)) +
  xlab("Average Expression") + ylab("LFC") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "HF vs HFK MA Plot")

pdf("PDF_figure/MAPlot.pdf",
    width = 5,
    height = 4)
ggplot(dif_analysis, aes(x = log(baseMean,2), y = log2FoldChange)) +
  xlab("Average Expression") + ylab("LFC") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "HF vs HFK MA Plot")
dev.off()
## quartz_off_screen 
##                 2
# Volcano Plot
ggplot(dif_analysis, aes(x = log2FoldChange, y = -log(padj,10))) +
  xlab("LFC") + ylab("-log10(P value)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "HF vs HFK Volcano Plot") + ylim(0,3)
## Warning: Removed 135 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).

pdf("PDF_figure/VolcanoPlot.pdf",
    width = 5,
    height = 4)
ggplot(dif_analysis, aes(x = log2FoldChange, y = -log(padj,10))) +
  xlab("LFC") + ylab("-log10(P value)") + 
  geom_point(data = dif_analysis, alpha = 0.5, size = 1, color = "grey") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange > 0), alpha = 0.5, size = 1, color = "red") +
  geom_point(data = subset(dif_analysis, padj < 0.05 & log2FoldChange < 0), alpha = 0.5, size = 1, color = "blue") +
labs(title = "HF vs HFK Volcano Plot") + ylim(0,3)
## Warning: Removed 135 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).
dev.off()
## quartz_off_screen 
##                 2

Analysis with peaks

Load peaks

peaks.all <- readRDS("~/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/CLIP_Analysis/Data Visualization/Datafiles/peaks-all-12232019.rds")

peaks.filtered <- readRDS("~/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/CLIP_Analysis/Data Visualization/Datafiles/peaks-filtered-12232019.rds")

Load miRNA database

mirna.info <- fread("/Users/mizuhi/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/miRNA_Analysis/Analysis/miR_Family_Info.txt", check.names = TRUE)
mirna.info <- mirna.info[Species.ID == 10090]
mirna.info[MiRBase.ID == "mmu-miR-124-3p.1", MiRBase.ID := "mmu-miR-124-3p"]
mirna.info[miR.family == "miR-124-3p.1", miR.family := "miR-124-3p"]
mirna.info[, seed := gsub("U", "T", Seed.m8)]

Load miRNA counts.

mirna.counts <- rawCountTable_no_norm
mirna.counts$miRNA <- rownames(mirna.counts)
mirna.counts <- as.data.table(mirna.counts)

mirna.counts$count <-
    rowSums(mirna.counts[,-7])
mirna.counts$count.hf <-
    rowSums(mirna.counts[, c("HF_4_miRNA", "HF_5_miRNA", "HF_6_miRNA")])
mirna.counts$count.hfk <-
    rowSums(mirna.counts[, c("HFK_4_miRNA", "HFK_5_miRNA", "HFK_6_miRNA")])
mirna.counts <- mirna.counts[order(-count)]
mirna.counts[, miRNA.shortname := miRNA]
which.to.change <- startsWith(mirna.counts$miRNA.shortname, "mmu-")
mirna.counts[which.to.change, ]$miRNA.shortname <-
    sapply(strsplit(mirna.counts[which.to.change, ]$miRNA.shortname, "mmu-"),
           "[", 2)
which.to.change <- endsWith(mirna.counts$miRNA.shortname, "-5p")
mirna.counts[which.to.change]$miRNA.shortname <-
    sapply(strsplit(mirna.counts[which.to.change]$miRNA.shortname, "-5p"),
           "[", 1)
which.to.change <- endsWith(mirna.counts$miRNA.shortname, "-3p")
mirna.counts[which.to.change]$miRNA.shortname <-
    sapply(strsplit(mirna.counts[which.to.change]$miRNA.shortname, "-3p"),
           "[", 1)
dds <- dds[rowSums(DESeq2::counts(dds)) > 200]
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
plotMA(dds)

mir.dif <- as.data.frame(results(dds, alpha = 0.05, contrast = c("condition", "experimental", "control")))
mir.dif$logP <- -1 * log10(mir.dif$padj)
mir.dif$miRNA <- rownames(mir.dif)
mirna.counts.DGE <- merge(mirna.counts, mir.dif, by = "miRNA", all = TRUE)
setwd("~/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/CLIP_Analysis/Data Visualization/Datafiles")
saveRDS(mirna.counts.DGE, "mirna-counts-deseq-12232019.rds")
mirna.info <- merge(mirna.info, mirna.counts[, .(MiRBase.ID = miRNA,
                                                 HF_4_miRNA, HF_5_miRNA, HF_6_miRNA,
                                                 HFK_4_miRNA, HFK_5_miRNA, HFK_6_miRNA)],
                    by = "MiRBase.ID", all.x = TRUE)


mirna.info[, HF_4_miRNA := ifelse(is.na(HF_4_miRNA), 0, HF_4_miRNA)]
mirna.info[, HF_5_miRNA := ifelse(is.na(HF_5_miRNA), 0, HF_5_miRNA)]
mirna.info[, HF_6_miRNA := ifelse(is.na(HF_6_miRNA), 0, HF_6_miRNA)]
mirna.info[, HFK_4_miRNA := ifelse(is.na(HFK_4_miRNA), 0, HFK_4_miRNA)]
mirna.info[, HFK_5_miRNA := ifelse(is.na(HFK_5_miRNA), 0, HFK_5_miRNA)]
mirna.info[, HFK_6_miRNA := ifelse(is.na(HFK_6_miRNA), 0, HFK_6_miRNA)]


mirna.info[, HF_4_family := sum(HF_4_miRNA),
           by = miR.family]
mirna.info[, HF_5_family := sum(HF_5_miRNA),
           by = miR.family]
mirna.info[, HF_6_family := sum(HF_6_miRNA),
           by = miR.family]
mirna.info[, HFK_4_family := sum(HFK_4_miRNA),
           by = miR.family]
mirna.info[, HFK_5_family := sum(HFK_5_miRNA),
           by = miR.family]
mirna.info[, HFK_6_family := sum(HFK_6_miRNA),
           by = miR.family]

mirna.counts.family <- mirna.info[, .(miR.family, Seed.m8, Family.Conservation., 
                                      HF_4_family, HF_5_family, HF_6_family,
                                      HFK_4_family, HFK_5_family, HFK_6_family)]
mirna.counts.family <- mirna.counts.family[!duplicated(mirna.counts.family)]
setwd("~/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/CLIP_Analysis/Data Visualization/Datafiles")
saveRDS(mirna.counts.family, "mirna-counts-by-family-12232019.rds")
dds.family <- DESeqDataSetFromMatrix(mirna.counts.family[,4:9],
                              colData = data.frame(row.names = colnames(mirna.counts.family[,4:9]),
                                                   condition = c(rep("HF",3), rep("HFK", 3))),
                              design = ~ condition)
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
rownames(dds.family) <- mirna.counts.family$miR.family
dds.family <- dds.family[rowSums(DESeq2::counts(dds.family))>200]
dds.family <- DESeq(dds.family) 
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
mirna.family.res <- results(dds.family, alpha = 0.05, contrast = c("condition", "HFK", "HF"))
mirna.family.DGE <- as.data.frame(mirna.family.res)
mirna.family.DGE$miR.family <- rownames(mirna.family.DGE)
mirna.family.DGE <- mirna.family.DGE[order(-mirna.family.DGE$log2FoldChange),]
setwd("~/OneDrive - Harvard University/Haigis Lab/Projects/Halo-Ago2/Halo-Ago-KRas/Raw Data/HEAP-CLIP/HEAP_12232019/Analysis/CLIP_Analysis/Data Visualization/Datafiles")
saveRDS(mirna.family.DGE, "mirna-counts-deseq-by-family-12232019.rds")
cal.z.score <- function(x){
  (x - mean(x)) / sd(x)
}

mirna.counts.family.norm <- DESeq2::counts(dds.family, normalized = TRUE)
mirna.counts.family.norm.log <- log2(mirna.counts.family.norm + 1)
mirna.counts.family.norm.z <- as.data.frame(t(apply(mirna.counts.family.norm.log, 1, cal.z.score)))
select <- subset(mirna.family.DGE, baseMean > 2000)
select <- select$miR.family
pheatmap(mirna.counts.family.norm.z[select,], 
         cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE,
         border_color = NA, fontsize_row = 8)

pdf("PDF_figure/Pheatmap_family.pdf",
    width = 8,
    height = 5)
pheatmap(mirna.counts.family.norm.z[select,], 
         cluster_rows=TRUE, show_rownames=TRUE, cluster_cols=TRUE,
         border_color = NA, fontsize_row = 8)
dev.off()
## pdf 
##   3
df <- mirna.family.DGE
df$logP <- -log10(mirna.family.DGE$padj)
p3 <- ggplot(data = df, aes(x = log2FoldChange, y = logP, label = miR.family, size = baseMean)) +
  geom_point(alpha = 0.6, colour = "#EC469A", shape = 1) + 
  #geom_point(data = family.highlight, aes(x = log2FoldChange, y = logP), colour = my.colors[1]) + 
  #geom_point(data = df[mir.highlight[1],], aes(x = log2FoldChange, y = logP), colour = my.colors[9], size = dotsize) +
  #geom_point(data = df[df$Row.names %in% mir.highlight[2:6],], aes(x = log2FoldChange, y = logP), colour = my.colors[4], size = dotsize) +
  #geom_point(data = df[mir.highlight[7],], aes(x = log2FoldChange, y = logP), colour = my.colors[2], size = dotsize) +
  #geom_point(data = df[mir.highlight[8:11],], aes(x = log2FoldChange, y = logP), colour = my.colors[4], size = dotsize) +
  #geom_point(data = df[df$Row.names %in% mir.highlight[12:13],], aes(x = log2FoldChange, y = logP), colour = my.colors[6], size = dotsize) +
  #geom_point(data = df[mir.highlight[13],], aes(x = log2FoldChange, y = logP), colour = my.colors[4], size = dotsize) +
  xlab("Fold change log2 (HFK / HF)") +
  ylab("-log10(FDR)") +
  theme_bw() +
  theme(panel.border = element_blank(),
    panel.background = element_blank(),
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(size=12, margin = margin(t = 10)),
    axis.title.y = element_text(size=12, margin = margin(r = 10)),
    axis.text = element_text(size=10),
    axis.line.y = element_line(size = 0.5),
    axis.line.x = element_line(size = 0.5),
    axis.ticks.x = element_line(size = 0),
    axis.ticks.y = element_line(size = 0.5),
    plot.title = element_text(hjust = 0.5, size = 12, face = "bold"))  + xlim(-3,3)
p4<- ggplotly(p3)
p4
pdf("PDF_figure/VolcanoPlot_expression.pdf",
    width = 5,
    height = 4)
p3
dev.off()
## quartz_off_screen 
##                 2

SessionInfo

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
##  [1] grid      parallel  stats4    stats     graphics  grDevices utils    
##  [8] datasets  methods   base     
## 
## other attached packages:
##  [1] mosaic_1.8.3                ggridges_0.5.3             
##  [3] mosaicData_0.20.2           ggformula_0.10.1           
##  [5] ggstance_0.3.5              Matrix_1.3-4               
##  [7] plotly_4.9.4                CLIPanalyze_0.0.10         
##  [9] GenomicAlignments_1.24.0    Rsamtools_2.4.0            
## [11] GenomicFeatures_1.40.1      AnnotationDbi_1.50.3       
## [13] plyr_1.8.6                  ggrepel_0.9.1              
## [15] Rsubread_2.2.6              Biostrings_2.56.0          
## [17] XVector_0.28.0              rtracklayer_1.48.0         
## [19] forcats_0.5.1               stringr_1.4.0              
## [21] purrr_0.3.4                 tidyr_1.1.3                
## [23] tibble_3.1.2                tidyverse_1.3.1            
## [25] data.table_1.14.0           dplyr_1.0.6                
## [27] readr_1.4.0                 RColorBrewer_1.1-2         
## [29] gplots_3.1.1                mixOmics_6.12.2            
## [31] MASS_7.3-54                 reshape_0.8.8              
## [33] lattice_0.20-44             ggplot2_3.3.3              
## [35] gridExtra_2.3               pheatmap_1.0.12            
## [37] tximport_1.16.1             DESeq2_1.28.1              
## [39] isomiRs_1.16.2              SummarizedExperiment_1.18.2
## [41] DelayedArray_0.14.1         matrixStats_0.59.0         
## [43] Biobase_2.48.0              GenomicRanges_1.40.0       
## [45] GenomeInfoDb_1.24.2         IRanges_2.22.2             
## [47] S4Vectors_0.26.1            BiocGenerics_0.34.0        
## [49] DiscriMiner_0.1-29         
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1                  tidyselect_1.1.1           
##   [3] RSQLite_2.2.7               htmlwidgets_1.5.3          
##   [5] BiocParallel_1.22.0         munsell_0.5.0              
##   [7] withr_2.4.2                 colorspace_2.0-1           
##   [9] biosignals_0.1.0            highr_0.9                  
##  [11] knitr_1.33                  rstudioapi_0.13            
##  [13] assertive.base_0.0-9        labeling_0.4.2             
##  [15] lasso2_1.2-21.1             GenomeInfoDbData_1.2.3     
##  [17] polyclip_1.10-0             mnormt_2.0.2               
##  [19] bit64_4.0.5                 farver_2.1.0               
##  [21] vctrs_0.3.8                 generics_0.1.0             
##  [23] xfun_0.23                   BiocFileCache_1.12.1       
##  [25] R6_2.5.0                    clue_0.3-59                
##  [27] assertive.sets_0.0-3        locfit_1.5-9.4             
##  [29] bitops_1.0-7                cachem_1.0.5               
##  [31] assertthat_0.2.1            scales_1.1.1               
##  [33] gtable_0.3.0                rlang_0.4.11               
##  [35] genefilter_1.70.0           GlobalOptions_0.1.2        
##  [37] splines_4.0.3               lazyeval_0.2.2             
##  [39] mosaicCore_0.9.0            broom_0.7.7                
##  [41] yaml_2.2.1                  reshape2_1.4.4             
##  [43] modelr_0.1.8                crosstalk_1.1.1            
##  [45] backports_1.2.1             tools_4.0.3                
##  [47] psych_2.1.3                 logging_0.10-108           
##  [49] ellipsis_0.3.2              jquerylib_0.1.4            
##  [51] ggdendro_0.1.22             Rcpp_1.0.6                 
##  [53] progress_1.2.2              zlibbioc_1.34.0            
##  [55] RCurl_1.98-1.3              prettyunits_1.1.1          
##  [57] openssl_1.4.4               GetoptLong_1.0.5           
##  [59] cowplot_1.1.1               haven_2.4.1                
##  [61] cluster_2.1.2               fs_1.5.0                   
##  [63] magrittr_2.0.1              RSpectra_0.16-0            
##  [65] circlize_0.4.13             reprex_2.0.0               
##  [67] tmvnsim_1.0-2               hms_1.1.0                  
##  [69] evaluate_0.14               xtable_1.8-4               
##  [71] leaflet_2.0.4.1             XML_3.99-0.6               
##  [73] readxl_1.3.1                shape_1.4.6                
##  [75] compiler_4.0.3              biomaRt_2.44.4             
##  [77] ellipse_0.4.2               KernSmooth_2.23-20         
##  [79] crayon_1.4.1                htmltools_0.5.1.1          
##  [81] mgcv_1.8-36                 corpcor_1.6.9              
##  [83] geneplotter_1.66.0          lubridate_1.7.10           
##  [85] DBI_1.1.1                   tweenr_1.0.2               
##  [87] dbplyr_2.1.1                ComplexHeatmap_2.4.3       
##  [89] rappdirs_0.3.3              cli_2.5.0                  
##  [91] igraph_1.2.6                pkgconfig_2.0.3            
##  [93] xml2_1.3.2                  rARPACK_0.11-0             
##  [95] annotate_1.66.0             bslib_0.2.5.1              
##  [97] rvest_1.0.0                 digest_0.6.27              
##  [99] ConsensusClusterPlus_1.52.0 rmarkdown_2.9              
## [101] cellranger_1.1.0            edgeR_3.30.3               
## [103] curl_4.3.1                  gtools_3.9.2               
## [105] rjson_0.2.20                lifecycle_1.0.0            
## [107] nlme_3.1-152                jsonlite_1.7.2             
## [109] DEGreport_1.24.1            viridisLite_0.4.0          
## [111] askpass_1.1                 limma_3.44.3               
## [113] fansi_0.5.0                 labelled_2.8.0             
## [115] pillar_1.6.1                GGally_2.1.1               
## [117] Nozzle.R1_1.1-1             fastmap_1.1.0              
## [119] httr_1.4.2                  survival_3.2-11            
## [121] glue_1.4.2                  png_0.1-7                  
## [123] bit_4.0.4                   ggforce_0.3.3              
## [125] stringi_1.6.2               sass_0.4.0                 
## [127] blob_1.2.1                  caTools_1.18.2             
## [129] memoise_2.0.0